<?php

include("utilities.php");
include("phpHTMLParser.php");

printxmlRequest('crawlSite');
parseXMLStr();
printxmlResponse('crawlSite');

function parseXMLStr()
{
	global $HTTP_RAW_POST_DATA, $xmlDoc, $xmlResponse, $xmlMessage, $result, $debug;

	$url    = $_REQUEST['url'];
	$home = $_REQUEST['home'];

	if($url==NULL || $url == "")
            $url = "http://127.0.0.1/~tagger/IIT_CR1.html";
	if($home==NULL || $home == "")
            $home = "http://www.iit.edu/~cr/passwd-only/";

	$xmlResponse .= "<list>";
	crawlIt($url, $home);
	$xmlResponse .= "</list>";

}

function crawlIt($url, $urlHome)
{
    global $HTTP_RAW_POST_DATA, $xmlDoc, $xmlResponse, $xmlMessage, $result, $debug;
    $content = site_get_contents($url);
    $parser = new phpHTMLParser($content);
    $HTMLObject = $parser->parse_tags(array("a", "title"));
//    $xmlResponse .= $HTMLObject->output();
    $aTags = $HTMLObject->getTagsByName("a");
    $count = 1;

    $siteName = $urlHome;
    $tagName  = "/Companies";
    addTags($siteName, $tagName);
    $tagName  = "/Facts";
    addTags($siteName, $tagName);
    $tagName  = "/Profile";
    addTags($siteName, $tagName);
    $tagName  = "/Priority";
    addTags($siteName, $tagName);
    $tagName  = "/Status";
    addTags($siteName, $tagName);

    foreach ($aTags as $a) {
       if (($a->href!=NULL && $a->innerHTML!=NULL) &&
          (strcmp(substr($a->innerHTML, 0, 4) ,"Page") != 0) ) {
	  $link_name = getNiceName($a->innerHTML);
          $link_url = getFullSite($urlHome, htmlspecialchars_decode($a->href));
          if($count == 1) {
               $company = $link_name;
               $company_url = $link_url;
           }
           else if($count == 2)
           {
               $profile = $link_name;
               $profile_url = $link_url;
           }
           else if($count == 3)
           {
               $priority = $link_name;
               $priority_url = $link_url;
           }
           else if($count == 4)
           {
               $status = $link_name;
               $status_url = $link_url;
           }
    	
           if($count == 4)
           {

               $siteName = $company_url;
               $tagName  = "/Facts/" . $company;
               addTags($siteName, $tagName);
               $siteName = $profile_url;
               $tagName  = "/Profile/" . $profile ."/". $company;
               addTags($siteName, $tagName);
               $siteName = $priority_url;
               $tagName  = "/Priority/" . $priority ."/". $company;
               addTags($siteName, $tagName);
               $siteName = $status_url;
               $tagName  = "/Status/" . $status ."/". $company;
               addTags($siteName, $tagName);

               $homeTag  = "/Companies/ByProfile/" . $profile . "/" . $company;
               $siteName = $company_url;
               $tagName  = $homeTag;
               addTags($siteName, $tagName);
               $siteName = $profile_url;
               $tagName  = $homeTag . "/profile";
               addTags($siteName, $tagName);
               $siteName = $priority_url;
               $tagName  = $homeTag . "/priority";
               addTags($siteName, $tagName);
               $siteName = $status_url;
               $tagName  = $homeTag . "/status";
               addTags($siteName, $tagName);

               $homeTag  = "/Companies/ByName/" . $company[0] ."/" . $company;
               $siteName = $company_url;
               $tagName  = $homeTag;
               addTags($siteName, $tagName);
               $siteName = $profile_url;
               $tagName  = $homeTag . "/profile";
               addTags($siteName, $tagName);
               $siteName = $priority_url;
               $tagName  = $homeTag . "/priority";
               addTags($siteName, $tagName);
               $siteName = $status_url;
               $tagName  = $homeTag . "/status";
               addTags($siteName, $tagName);


               $xmlResponse .= "<company>" . $company . "</company>";
               $xmlResponse .= "<profile>" . $profile . "</profile>";
               $xmlResponse .= "<priority>" . $priority . "</priority>";
               $xmlResponse .= "<status>" . $status . "</status>";
    
           }
    	
           if($company == "")
           {
              continue;
           }
           else
           {
               $count = $count+1;
               if($count > 4) $count = $count-4;
           }
       }
    }
}



function addTags($siteName, $tagName)
{
	global $HTTP_RAW_POST_DATA, $xmlDoc, $xmlResponse, $xmlMessage, $result, $debug;

//	$metaInfo = get_meta_tags($siteName);
//	$title = $metaInfo['title'];
//	$keyWords = csv2array($metaInfo['keywords']);

	$command  = "tag add \"" .$siteName. "\" ";
	$command .= "\"" .$tagName. "\"";

	exec($command);
	//echo $command . "<br />";
	$xmlResponse .= "<command>". $command  ."</command>";
}

?>
